Set working directory
setwd("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles")
Load libraries
library(phyloseq)
library(dplyr)
library(ggplot2)
library(viridis)
library(stringr)
library(vegan)
library(RColorBrewer)
library(BiocManager)
BiocManager::install("microbiome")
##
## The downloaded binary packages are in
## /var/folders/3x/wmkrzjt576j76t00d3dq37lhgqkkb3/T//Rtmp9CcR5B/downloaded_packages
library(microbiome)
library(knitr)
library(ggpubr)
Metaxa2
# Load data
metaxa_genus <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/metaxa_genus.txt")
# Create OTU table
OTU_metaxa <- metaxa_genus[,-1]
# Create tax table
tax_table_metaxa <- data.frame(str_split_fixed(data.frame(metaxa_genus) [,1], ";", 6))
colnames(tax_table_metaxa) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus")
# Combine into phyloseq object
metaxa_PHY <- phyloseq(otu_table(OTU_metaxa, taxa_are_rows=TRUE),
tax_table(as.matrix(tax_table_metaxa)), sample_data(metadata))
Filter samples
# With low quality (BFH24_S142, BH63_S118, FH10_S171)
metaxa_PHY = subset_samples(metaxa_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")
# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
metaxa_PHY_ww = subset_samples(metaxa_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")
Ordinate Metaxa2 result to study wheather the taxonomy can be explained by the variable “country”
# Change into relative data
metaxa_rel_PHY <- transform_sample_counts(metaxa_PHY_ww, function(x) x/sum(x))
# Ordinate and plot data
metaxa_rel_PHY_ord <- ordinate(metaxa_rel_PHY, method = "PCoA", distance = "horn")
p1 <- plot_ordination(metaxa_rel_PHY, metaxa_rel_PHY_ord, color = "country", label = "name")
metaxa.p1 <- p1 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) + geom_point(colour = "black",
pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.9, linetype = 1) +
theme_minimal() + labs(title = "Metaxa2")
metaxa.p1

# Test significance using pair-wise adonis
metaxa_temp <- subset_samples(metaxa_PHY_ww, (country == "Benin" | country == "Finland"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.9415 1.94145 9.0971 0.15658 0.001 ***
## Residuals 49 10.4573 0.21341 0.84342
## Total 50 12.3988 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaxa_temp <- subset_samples(metaxa_PHY_ww, (country == "Burkina Faso" | country == "Finland"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.0193 1.01926 4.737 0.09524 0.001 ***
## Residuals 45 9.6826 0.21517 0.90476
## Total 46 10.7019 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaxa_temp <- subset_samples(metaxa_PHY_ww, (country == "Benin" | country == "Burkina Faso"))
metaxa_dist <- vegdist(t(otu_table(metaxa_temp)), dist = "horn")
adonis(metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaxa_dist ~ country, data = data.frame(sample_data(metaxa_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.8664 0.86641 3.8123 0.0466 0.001 ***
## Residuals 78 17.7271 0.22727 0.9534
## Total 79 18.5935 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The explanatory variable "country" explains some (~15.7 %) of the difference in the grouping of taxonomy in the comparison between of Benin and Finland.
Metaphlan3
# Modify data in command-line to match sample names in metadata file
## tr '|' ';' <merged_abundance_table.txt > mod_merged_abundance_table.txt
## sed -i 's/_profile//g' mod_merged_abundance_table.txt
## sed -i 's/BFH38-A_S156/BFH38.A_S156/g' mod_merged_abundance_table.txt
## sed -i 's/BFH38-B_S157/BFH38.B_S157/g' mod_merged_abundance_table.txt
## sed -i 's/BH34-A_S98/BH34.A_S98/g' mod_merged_abundance_table.txt
## sed -i 's/BH34-B_S99/BH34.B_S99/g' mod_merged_abundance_table.txt
# Load data
metaphlan <- as.matrix(read.table("mod_merged_abundance_table.txt", fill = 1, header = T, check.names = F))
# Exclude the first column "NCBI_tax_id"
metaphlan <- subset(metaphlan, select = -c(NCBI_tax_id))
# Create OTU table
OTU_metaphlan <- metaphlan[,-1]
# Change values as numeric
class(OTU_metaphlan) <- "numeric"
# Create tax_table
tax_table_metaphlan <- data.frame(str_split_fixed(data.frame(metaphlan) [,1], ";", 7))
colnames(tax_table_metaphlan) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
# Clean "*__"
tax_table_metaphlan <- apply(tax_table_metaphlan, 2, function(y) (gsub(".__", "", y)))
# Combine into phyloseq object
metaphlan_PHY <- phyloseq(otu_table(OTU_metaphlan, taxa_are_rows = TRUE), tax_table(as.matrix(tax_table_metaphlan)), sample_data(metadata))
Filter smaples
# Virus
metaphlan_PHY <- subset_taxa(metaphlan_PHY, Kingdom != "Virus")
# With low quality (BFH24_S142, BH63_S118, FH10_S171)
metaphlan_PHY = subset_samples(metaxa_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")
# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
metaphlan_PHY_ww = subset_samples(metaphlan_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")
Ordinate Metaphlan3 results to study wheather the taxonomy can be explained by the variable “country”
# Ordinate and plot data
metaphlan_PHY_ord <- ordinate(metaphlan_PHY_ww, method = "PCoA", distance = "horn")
p2 <- plot_ordination(metaphlan_PHY_ww, metaphlan_PHY_ord, color = "country", label = "name")
metaphlan.p2 <- p2 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) +
geom_point(colour = "black", pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.9, linetype = 1) +
theme_minimal() + labs(title = "Metaphlan3")
metaphlan.p2

# Test significance using pair-wise adonis
metaphlan_temp <- subset_samples(metaphlan_PHY_ww, (country == "Benin" | country == "Finland"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.9415 1.94145 9.0971 0.15658 0.001 ***
## Residuals 49 10.4573 0.21341 0.84342
## Total 50 12.3988 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaphlan_temp <- subset_samples(metaphlan_PHY_ww, (country == "Burkina Faso" | country == "Finland"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.0193 1.01926 4.737 0.09524 0.001 ***
## Residuals 45 9.6826 0.21517 0.90476
## Total 46 10.7019 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metaphlan_temp <- subset_samples(metaphlan_PHY_ww, (country == "Burkina Faso" | country == "Benin"))
metaphlan_dist <- vegdist(t(otu_table(metaphlan_temp)), dist = "horn")
adonis(metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Call:
## adonis(formula = metaphlan_dist ~ country, data = data.frame(sample_data(metaphlan_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.8664 0.86641 3.8123 0.0466 0.001 ***
## Residuals 78 17.7271 0.22727 0.9534
## Total 79 18.5935 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The results are almost identical to the ones from Metaxa2 analysis: The explanatory variable "country" explains some (~15.7 %) of the difference in the grouping of taxonomy in the comparison between of Benin and Finland.
ResFinder
Load data
# Modify mapping output file "ARG_genemat.txt" in command line to match sample names in metadata file
## sed 's/BFH38-A_S156/BFH38.A_S156/g' ARG_genemat.txt > mod_ARG_genemat.txt
## sed -i 's/BFH38-B_S157/BFH38.B_S157/g' mod_ARG_genemat.txt
## sed -i 's/BH34-A_S98/BH34.A_S98/g' mod_ARG_genemat.txt
## sed -i 's/BH34-B_S99/BH34.B_S99/g' mod_ARG_genemat.txt
ARG_genemat <-as.matrix(read.table("mod_ARG_genemat.txt", header= T, check.names = F, row.names = 1))
Load ResFinder gene lengths
# Modify "resfinder.fasta" file so that only hits remain
## seqkit grep -f ARG_genes.txt resfinder.fasta > filtered_resfinder.fasta
# Check if there are correct number of lines
## grep ">" filtered_resfinder.fasta | wc -l
# Print out the gene lengths of these genes into file "lengths_resfinder.txt"
## cat filtered_resfinder.fasta | awk '$0 ~ ">" {if (NR > 1) {print c;} c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }' > lengths_resfinder.txt
# Reorder in excel to match with file "ARG_genemat_norm"
lengths_resfinder <-as.matrix(read.table("lengths_resfinder.txt", header= F, check.names = T, row.names = 1))
colnames(lengths_resfinder) <- c("Length")
HC normalization
# Divide by ResFinder hit gene lengths
ARG_length_norm <- ARG_genemat/lengths_resfinder[, 1]
# Divide by SSU counts and normalize to bacterial 16S rRNA length (1550)
ARG_genemat_norm <- t(t(ARG_length_norm)/metadata$SSU_counts) * 1550
# Check if correct:
identical(ARG_genemat[2020, 4]/metadata$SSU_counts[4], ARG_genemat_norm[2020, 4])
## [1] TRUE
# Save and load again to exclude row.names
write.table(ARG_genemat_norm, "~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=T, sep = "\t", col.names = T)
ARG_genemat_norm <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=NULL)
ARG_genemat_norm$row.names<-NULL
Normalize ARG counts to SSU
ARG_genemat_norm <- t(t(ARG_genemat)/metadata$SSU_counts)
# Check if correct:
identical(ARG_genemat[2020, 4]/metadata$SSU_counts[10], ARG_genemat_norm[2020, 4])
## [1] TRUE
# Save and load again to exclude row.names
write.table(ARG_genemat_norm, "~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=T, sep = "\t", col.names = T)
ARG_genemat_norm <- read.delim("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/ARG_genemat_norm.txt", row.names=NULL)
ARG_genemat_norm$row.names<-NULL
Create phyloseq object
# Create tax table
tax_table_resfinder <- read.csv("~/Documents/Metagenomes_AMRIWA/R/AMRIWA/RFiles/tax_table_resfinder.txt", header=FALSE, sep=";")
colnames(tax_table_resfinder) <- c("Gene", "Class")
# Combine to phyloseq object
resfinder_PHY <- phyloseq(otu_table(ARG_genemat_norm, taxa_are_rows = TRUE), sample_data(metadata),
tax_table(as.matrix(tax_table_resfinder)))
Filter samples
# With low quality (BFH24_S142, BH63_S118, FH10_S171)
resfinder_PHY = subset_samples(resfinder_PHY, name != "BFH24_S142" & name != "BH63_S118" & name != "FH10_S171")
# Feces samples (BH20_S88, BH22_S89, BH24_S90, BH25_S91)
resfinder_PHY_ww = subset_samples(resfinder_PHY, name!="BH20_S88" & name!="BH22_S89" & name!="BH24_S90" & name!="BH25_S91")
Ordinate ResFinder mapping results to study wheather the taxonomy can be explained by the variable “country”
# Ordinate and plot data
resfinder_PHY_ord <- ordinate(resfinder_PHY_ww, method = "PCoA", distance = "horn")
p3 <- plot_ordination(resfinder_PHY_ww, resfinder_PHY_ord, color = "country", label = "name")
resfinder.p3 <- p3 + scale_color_manual(values=c("#FF333F", "#35E0F5", "#531592")) +
geom_point(colour = "black", pch = 21, size = 2, alpha = 0.5) + stat_ellipse(level = 0.9, linetype = 1) +
theme_minimal() + labs(title = "ResFinder")
resfinder.p3

# Test significance between all pairs
resfinder_temp <- subset_samples(resfinder_PHY_ww, (country == "Finland" | country == "Benin"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.561 1.56098 5.4817 0.10062 0.001 ***
## Residuals 49 13.953 0.28476 0.89938
## Total 50 15.514 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resfinder_temp <- subset_samples(resfinder_PHY_ww, (country == "Burkina Faso" | country == "Benin"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 0.8742 0.87417 3.2986 0.04057 0.004 **
## Residuals 78 20.6710 0.26501 0.95943
## Total 79 21.5452 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resfinder_temp <- subset_samples(resfinder_PHY_ww, (country == "Finland" | country == "Burkina Faso"))
resfinder_dist <- vegdist(t(otu_table(resfinder_temp)), dist = "horn")
adonis(resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Call:
## adonis(formula = resfinder_dist ~ country, data = data.frame(sample_data(resfinder_temp), permutations = 9999))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## country 1 1.4168 1.41684 6.4565 0.12547 0.001 ***
## Residuals 45 9.8750 0.21945 0.87453
## Total 46 11.2919 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The explanatory variable "country" explains some (~10.1 % [HC: ~10.2 %]) of the difference in the grouping of taxonomy in the comparison between of Benin and Finland and some (~12.5 % [HC: ~12.9 %]) between Burkina Faso and Finland.
Diversity indexes by Metaxa2
# Take a look at the indexes (now not the relative data, we'll see...)
metaxa_tab <-microbiome::alpha(metaxa_PHY_ww, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaxa_tab))
| BFH10_S128 |
760 |
996.5654 |
22.24596 |
0.9550480 |
4.081135 |
134.4811 |
8 |
0.8704043 |
0.6152479 |
0.0292710 |
0.2217539 |
0.2167769 |
0.1233846 |
0.2096778 |
4707 |
0.1233846 |
0.0449520 |
0.9757005 |
0.9735181 |
2.061394 |
0.1459540 |
0.0042989 |
| BFH11_S129 |
834 |
1134.7348 |
19.13635 |
0.9477434 |
4.081370 |
149.1305 |
7 |
0.8896129 |
0.6067839 |
0.0229453 |
0.2232231 |
0.2180059 |
0.1565893 |
0.2524138 |
6244 |
0.1565893 |
0.0522566 |
0.9798621 |
0.9715648 |
2.061395 |
0.1546583 |
0.0047649 |
| BFH12_S130 |
706 |
961.1532 |
16.79643 |
0.9404635 |
3.909282 |
129.7812 |
6 |
0.9210474 |
0.5959620 |
0.0237910 |
0.2328774 |
0.2198822 |
0.1629555 |
0.2622670 |
4852 |
0.1629555 |
0.0595365 |
0.9830730 |
0.9755161 |
2.061385 |
0.1430395 |
0.0043325 |
| BFH13_S131 |
671 |
845.3368 |
14.20372 |
0.9295959 |
3.740362 |
109.8236 |
5 |
0.8966893 |
0.5746650 |
0.0211680 |
0.1973915 |
0.2068136 |
0.1725040 |
0.3283068 |
8511 |
0.1725040 |
0.0704041 |
0.9748267 |
0.9798373 |
2.061330 |
0.1261502 |
0.0039321 |
| BFH14_S132 |
665 |
928.1386 |
25.68127 |
0.9610611 |
4.130331 |
114.6507 |
10 |
0.7772335 |
0.6354564 |
0.0386184 |
0.1981389 |
0.2150153 |
0.1175224 |
0.2025528 |
4438 |
0.1175224 |
0.0389389 |
0.9744724 |
0.9764716 |
2.061387 |
0.1255991 |
0.0072558 |
| BFH15_S133 |
698 |
878.7059 |
15.11920 |
0.9338589 |
4.018954 |
118.9171 |
9 |
0.8846212 |
0.6137476 |
0.0216607 |
0.2001027 |
0.2168680 |
0.2284483 |
0.2854829 |
9593 |
0.2284483 |
0.0661411 |
0.9474662 |
0.9755203 |
2.061316 |
0.1207849 |
0.0298628 |
# Create data table
metaxa.meta <- meta(metaxa_PHY_ww)
kable(head(metaxa.meta))
| BFH10_S128 |
BFH10_S128 |
31970832 |
Burkina Faso |
Clinique SOUKA |
waste water |
delivery room, surgery |
| BFH11_S129 |
BFH11_S129 |
31323748 |
Burkina Faso |
Clinique SOUKA |
waste water |
laboratory |
| BFH12_S130 |
BFH12_S130 |
24455299 |
Burkina Faso |
Clinique SOUKA |
waste water |
peadiatrics |
| BFH13_S131 |
BFH13_S131 |
31789060 |
Burkina Faso |
Source de Vie, Ouagadougou |
waste water |
delivery room |
| BFH14_S132 |
BFH14_S132 |
33699519 |
Burkina Faso |
Source de Vie, Ouagadougou |
waste water |
hospitalization, maternity |
| BFH15_S133 |
BFH15_S133 |
30866760 |
Burkina Faso |
Source de Vie, Ouagadougou |
waste water |
hospitalization, maternity, surgery |
# Add indexes to data table
## Shannon
metaxa.meta$diversity_shannon <- metaxa_tab$diversity_shannon
## Gini Simpson
metaxa.meta$diversity_gini_simpson <- metaxa_tab$diversity_gini_simpson
## Inverse Simpson
metaxa.meta$diversity_inverse_simpson <- metaxa_tab$diversity_inverse_simpson
# Check whether the distribution of the diversity is normal
hist(metaxa.meta$diversity_shannon)

shapiro.test(metaxa.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
##
## Shapiro-Wilk normality test
##
## data: metaxa.meta$diversity_shannon
## W = 0.93911, p-value = 0.0004099
qqnorm(metaxa.meta$diversity_shannon)

hist(metaxa.meta$diversity_gini_simpson)

shapiro.test(metaxa.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
##
## Shapiro-Wilk normality test
##
## data: metaxa.meta$diversity_gini_simpson
## W = 0.68269, p-value = 1.428e-12
qqnorm(metaxa.meta$diversity_gini_simpson)

hist(metaxa.meta$diversity_inverse_simpson)

shapiro.test(metaxa.meta$diversity_inverse_simpson)
##
## Shapiro-Wilk normality test
##
## data: metaxa.meta$diversity_inverse_simpson
## W = 0.98082, p-value = 0.2121
qqnorm(metaxa.meta$diversity_inverse_simpson)

# Create a list of pairwise comparisons
div.country <- levels(metaxa.meta$country)
country.pairs <- combn(seq_along(div.country), 2, simplify = FALSE, FUN = function(i)div.country[i])
print(country.pairs)
## [[1]]
## [1] "Benin" "Burkina Faso"
##
## [[2]]
## [1] "Benin" "Finland"
##
## [[3]]
## [1] "Burkina Faso" "Finland"
# Plot
## Shannon
p4 <- ggviolin(metaxa.meta, x = "country", y = "diversity_shannon",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p4 <- p4 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p4)

## Gini Simpson
p5 <- ggviolin(metaxa.meta, x = "country", y = "diversity_gini_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p5 <- p5 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p5)

# The probability that two individuals randomly selected from a sample will belong to same species is higher in Finland than in Benin.
## Inverse Simpson
p6 <- ggviolin(metaxa.meta, x = "country", y = "diversity_inverse_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p6 <- p6 + stat_compare_means(comparisons = country.pairs, method = "t.test")
print(p6)

# The alpha diversity is the highest in Benin compared to Burkina Faso and Finland. Of Burkina Faso and Finland the diversity is higher in Burkina Faso.
Diversity indexes by Metaphlan3
# Take a look at the indexes
metaphlan_tab <-microbiome::alpha(metaphlan_PHY_ww, index = "all")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(metaphlan_tab))
| BFH10_S128 |
760 |
996.5654 |
22.24596 |
0.9550480 |
4.081135 |
134.4811 |
8 |
0.8704043 |
0.6152479 |
0.0292710 |
0.2217539 |
0.2167769 |
0.1233846 |
0.2096778 |
4707 |
0.1233846 |
0.0449520 |
0.9757005 |
0.9735181 |
2.061394 |
0.1459540 |
0.0042989 |
| BFH11_S129 |
834 |
1134.7348 |
19.13635 |
0.9477434 |
4.081370 |
149.1305 |
7 |
0.8896129 |
0.6067839 |
0.0229453 |
0.2232231 |
0.2180059 |
0.1565893 |
0.2524138 |
6244 |
0.1565893 |
0.0522566 |
0.9798621 |
0.9715648 |
2.061395 |
0.1546583 |
0.0047649 |
| BFH12_S130 |
706 |
961.1532 |
16.79643 |
0.9404635 |
3.909282 |
129.7812 |
6 |
0.9210474 |
0.5959620 |
0.0237910 |
0.2328774 |
0.2198822 |
0.1629555 |
0.2622670 |
4852 |
0.1629555 |
0.0595365 |
0.9830730 |
0.9755161 |
2.061385 |
0.1430395 |
0.0043325 |
| BFH13_S131 |
671 |
845.3368 |
14.20372 |
0.9295959 |
3.740362 |
109.8236 |
5 |
0.8966893 |
0.5746650 |
0.0211680 |
0.1973915 |
0.2068136 |
0.1725040 |
0.3283068 |
8511 |
0.1725040 |
0.0704041 |
0.9748267 |
0.9798373 |
2.061330 |
0.1261502 |
0.0039321 |
| BFH14_S132 |
665 |
928.1386 |
25.68127 |
0.9610611 |
4.130331 |
114.6507 |
10 |
0.7772335 |
0.6354564 |
0.0386184 |
0.1981389 |
0.2150153 |
0.1175224 |
0.2025528 |
4438 |
0.1175224 |
0.0389389 |
0.9744724 |
0.9764716 |
2.061387 |
0.1255991 |
0.0072558 |
| BFH15_S133 |
698 |
878.7059 |
15.11920 |
0.9338589 |
4.018954 |
118.9171 |
9 |
0.8846212 |
0.6137476 |
0.0216607 |
0.2001027 |
0.2168680 |
0.2284483 |
0.2854829 |
9593 |
0.2284483 |
0.0661411 |
0.9474662 |
0.9755203 |
2.061316 |
0.1207849 |
0.0298628 |
# Create data table
metaphlan.meta <- meta(metaphlan_PHY_ww)
kable(head(metaxa.meta))
| BFH10_S128 |
BFH10_S128 |
31970832 |
Burkina Faso |
Clinique SOUKA |
waste water |
delivery room, surgery |
4.081135 |
0.9550480 |
22.24596 |
| BFH11_S129 |
BFH11_S129 |
31323748 |
Burkina Faso |
Clinique SOUKA |
waste water |
laboratory |
4.081370 |
0.9477434 |
19.13635 |
| BFH12_S130 |
BFH12_S130 |
24455299 |
Burkina Faso |
Clinique SOUKA |
waste water |
peadiatrics |
3.909282 |
0.9404635 |
16.79643 |
| BFH13_S131 |
BFH13_S131 |
31789060 |
Burkina Faso |
Source de Vie, Ouagadougou |
waste water |
delivery room |
3.740362 |
0.9295959 |
14.20372 |
| BFH14_S132 |
BFH14_S132 |
33699519 |
Burkina Faso |
Source de Vie, Ouagadougou |
waste water |
hospitalization, maternity |
4.130331 |
0.9610611 |
25.68127 |
| BFH15_S133 |
BFH15_S133 |
30866760 |
Burkina Faso |
Source de Vie, Ouagadougou |
waste water |
hospitalization, maternity, surgery |
4.018954 |
0.9338589 |
15.11920 |
# Add indexes to data table
## Shannon
metaphlan.meta$diversity_shannon <- metaphlan_tab$diversity_shannon
## Gini Simpson
metaphlan.meta$diversity_gini_simpson <- metaphlan_tab$diversity_gini_simpson
## Inverse Simpson
metaphlan.meta$diversity_inverse_simpson <- metaphlan_tab$diversity_inverse_simpson
# Check whether the distribution of the diversity is normal
hist(metaphlan.meta$diversity_shannon)

shapiro.test(metaphlan.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
##
## Shapiro-Wilk normality test
##
## data: metaphlan.meta$diversity_shannon
## W = 0.93911, p-value = 0.0004099
qqnorm(metaphlan.meta$diversity_shannon)

hist(metaphlan.meta$diversity_gini_simpson)

shapiro.test(metaphlan.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
##
## Shapiro-Wilk normality test
##
## data: metaphlan.meta$diversity_gini_simpson
## W = 0.68269, p-value = 1.428e-12
qqnorm(metaphlan.meta$diversity_gini_simpson)

hist(metaphlan.meta$diversity_inverse_simpson)

shapiro.test(metaphlan.meta$diversity_inverse_simpson)
##
## Shapiro-Wilk normality test
##
## data: metaphlan.meta$diversity_inverse_simpson
## W = 0.98082, p-value = 0.2121
qqnorm(metaxa.meta$diversity_inverse_simpson)

# Plot
## Shannon
p7 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_shannon",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p7 <- p7 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p7)

## Gini Simpson
p8 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_gini_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p8 <- p8 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p8)

# The outlier samole in here is FH5_S166
## Inverse Simpson
p9 <- ggviolin(metaphlan.meta, x = "country", y = "diversity_inverse_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p9 <- p9 + stat_compare_means(comparisons = country.pairs, method = "t.test")
print(p9)

# The results are identical to those from Metaxa2.
Diversity indexes by ResFinder
# Shannon
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_ww, index = "diversity_shannon")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
| BFH12_S130 |
5.247442 |
| BH09_S83 |
4.809859 |
| BFH35_S153 |
5.921721 |
| BSE93_S121 |
5.053210 |
| BH47_S108 |
4.892473 |
| BH52_S112 |
4.670272 |
## Create data table
resfinder.meta <- meta(resfinder_PHY_ww)
kable(head(resfinder.meta))
| BFH12_S130 |
BFH12_S130 |
24455299 |
Burkina Faso |
Clinique SOUKA |
waste water |
peadiatrics |
29775 |
| BH09_S83 |
BH09_S83 |
24778124 |
Benin |
Hopital de Zone Calavi |
waste water |
surgery room of demanding operations, sump |
30132 |
| BFH35_S153 |
BFH35_S153 |
30094227 |
Burkina Faso |
Yalgado teaching hospital of Ouagadougou |
waste water |
operating room, neurosurgery, traumatology |
75529 |
| BSE93_S121 |
BSE93_S121 |
37658434 |
Benin |
Savalou area |
tap water |
drinking water in Aglomodjodji village |
40428 |
| BH47_S108 |
BH47_S108 |
36833527 |
Benin |
Hospital Saint Jean |
waste water |
hospitalization ward, vaccination room septic tank |
47007 |
| BH52_S112 |
BH52_S112 |
36821916 |
Benin |
Hospital Saint Jean |
clean water |
tank for washing hands |
25959 |
## Add indexes to data table
resfinder.meta$diversity_shannon <- resfinder_tab$diversity_shannon
# Check whether the distribution of the diversity is normal
hist(resfinder.meta$diversity_shannon)

shapiro.test(resfinder.meta$diversity_shannon) # Does not fit the assumption of normal distribution > wilcox test.
##
## Shapiro-Wilk normality test
##
## data: resfinder.meta$diversity_shannon
## W = 0.83837, p-value = 1.923e-08
qqnorm(resfinder.meta$diversity_shannon)

# Plot
p10 <- ggviolin(resfinder.meta, x = "country", y = "diversity_shannon",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p10 <- p10 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p10)

# Gini Simpson
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_ww, index = "diversity_gini_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
| BFH12_S130 |
0.9894870 |
| BH09_S83 |
0.9835953 |
| BFH35_S153 |
0.9942990 |
| BSE93_S121 |
0.9820126 |
| BH47_S108 |
0.9830211 |
| BH52_S112 |
0.9732776 |
## Add indexes to data table
resfinder.meta$diversity_gini_simpson <- resfinder_tab$diversity_gini_simpson
# Check whether the distribution of the diversity is normal
hist(resfinder.meta$diversity_gini_simpson)

shapiro.test(resfinder.meta$diversity_gini_simpson) # Does not fit the assumption of normal distribution > wilcox test.
##
## Shapiro-Wilk normality test
##
## data: resfinder.meta$diversity_gini_simpson
## W = 0.36077, p-value < 2.2e-16
qqnorm(resfinder.meta$diversity_gini_simpson)

# Plot
p11 <- ggviolin(resfinder.meta, x = "country", y = "diversity_gini_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p11 <- p11 + stat_compare_means(comparisons = country.pairs, method = "wilcox.test")
print(p11)

# The outlier sample here is FH6-S167
# Inverse Simpson
## Take a look at the indexes
resfinder_tab <-microbiome::alpha(resfinder_PHY_ww, index = "diversity_inverse_simpson")
## Observed richness
## Other forms of richness
## Diversity
## Evenness
## Dominance
## Rarity
kable(head(resfinder_tab))
| BFH12_S130 |
95.12045 |
| BH09_S83 |
60.95816 |
| BFH35_S153 |
175.40708 |
| BSE93_S121 |
55.59435 |
| BH47_S108 |
58.89676 |
| BH52_S112 |
37.42173 |
## Add indexes to data table
resfinder.meta$diversity_inverse_simpson <- resfinder_tab$diversity_inverse_simpson
# Check whether the distribution of the diversity is normal
hist(metaphlan.meta$diversity_inverse_simpson)

shapiro.test(metaphlan.meta$diversity_inverse_simpson)
##
## Shapiro-Wilk normality test
##
## data: metaphlan.meta$diversity_inverse_simpson
## W = 0.98082, p-value = 0.2121
qqnorm(metaxa.meta$diversity_inverse_simpson)

# Plot
p12 <- ggviolin(resfinder.meta, x = "country", y = "diversity_inverse_simpson",
add = "boxplot", fill = "country", palette = c("#FF333F", "#35E0F5", "#531592"))
p12 <- p12 + stat_compare_means(comparisons = country.pairs, method = "t.test")
print(p12)

# The alpha diversity of resistance genes deteced by ResFinder seems slightly higher in Burkina Faso than in Benin and in Finland.